from gxl_ai_utils.utils import utils_file

input_data_list_path = "/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/data.list"
data_list = utils_file.load_dict_list_from_jsonl(input_data_list_path)
new_data_list = []
for dict_i in data_list:
    dict_i['wav'] = dict_i['wav'].replace('/home/work_nfs9/cywang/data/multi_label_withcaption/','/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/wav/')
    dict_i['wav'] = dict_i['wav'].replace('/home/work_nfs11/zhguo/code/osum_tmp/syndata/cywang/0628/multilabel_qa_2000_cleaned.jsonl_wavs/','/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/wav/')
    gender = dict_i['gender']
    age = dict_i['age']
    emotion = dict_i['emotion']
    style = "<" + dict_i['style'] +">"
    extra_info = dict_i['extra']
    extra_info['gender'] = gender
    extra_info['age'] = age
    extra_info['q_emotion'] = emotion
    extra_info['style'] = style
    extra_info.pop('emotion', None)
    dict_i['extra'] = extra_info
    new_data_list.append(dict_i)
utils_file.write_dict_list_to_jsonl(new_data_list, "/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/data_new.list")


utils_file.print_list(new_data_list[:10])

from make_shard_common import make_shards_common
make_shards_common(
    jsonl_file="/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/data_new.list",
    shards_dir="/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k",
    num_threads=2,
    is_jump_exist=False
)


print('================================')
input_data_list_path = "/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_2k/data.list"
input_data_list_path = "/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_7k/data.list"
data_list = utils_file.load_dict_list_from_jsonl(input_data_list_path)
new_data_list = []
for dict_i in data_list:
    dict_i['wav'] = dict_i['wav'].replace('/home/work_nfs11/cywang/data/multi_label_withcaption_7000/','/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_7k/wav/')
    gender = dict_i['gender']
    age = dict_i['age']
    emotion = dict_i['emotion']
    style = "<" + dict_i['style'] +">"
    extra_info = dict_i['extra']
    extra_info['gender'] = gender
    extra_info['age'] = age
    extra_info['q_emotion'] = emotion
    extra_info['style'] = style
    extra_info.pop('emotion', None)
    dict_i['extra'] = extra_info
    new_data_list.append(dict_i)
utils_file.write_dict_list_to_jsonl(new_data_list, "/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_7k/data_new.list")


utils_file.print_list(new_data_list[:10])

from make_shard_common import make_shards_common
make_shards_common(
    jsonl_file="/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_7k/data_new.list",
    shards_dir="/home/work_nfs23/asr_data/data/osum_chat/s2s_paralanguage/s2s_para_multilabel_implicit_7k",
    num_threads=2,
    is_jump_exist=False
)


